library(tidyverse)
library(ggplot2)
library(cregg)
library(cjoint)
library(readstata13)
library(data.table)
#plot theme
plot_theme =  theme_bw()+ theme(text = element_text(size=10), 
                                panel.grid.major = element_blank(),
                                panel.grid.minor = element_blank(),
                                panel.grid.major.y = element_line(colour = "#CCCCCC"),
                                axis.line = element_line(colour = "black"),
                                strip.background=element_rect(colour=NA, fill="#DDDDDD"),
                                legend.title=element_blank(), 
                                legend.position="top",
                                axis.title=element_text(size=14,face="bold"), 
                                axis.text=element_text(size=12),
                                strip.text.x = element_text(size = 12),
                                legend.text=element_text(size=12), 
                                axis.text.x = element_text(angle = 45, hjust = 1, vjust=1)) 
#plot features
pd<-position_dodge(.2)
pd_text<-position_dodge(1)

#number of runs for boot strap 
runs = 3000

##Read in data (assuming WD set to same directory as data)
results_data <- read.dta13("final_w1w2.dta") %>% filter(is.na(wave1_only))


#format
results_data<- results_data %>% 
  mutate(country = factor(country, levels=c("United States", "Germany", "A country outside the United States", "China"))) %>%
  mutate(mark_up_cat = case_when(
    mark_up == "1" ~ "100", 
    mark_up == "0.25" ~ "25", 
    mark_up == "0.5" ~ "50",
    mark_up == "0" ~ "0")) %>% 
  mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100")))


#bin our ethnocentrism measure 
results_data <- results_data %>%
  mutate(ethno_factor = case_when(
    ethnocentrism <= .375 ~ "Low ethnocentrism", 
    ethnocentrism > .375 & ethnocentrism < .625  ~ "Medium ethnocentrism", 
    ethnocentrism >= .625  ~ "High ethnocentrism", 
  )) %>% mutate(ethno_factor = as.factor(ethno_factor))

######
# Figure 1 and in text estimates 
######

# Left Panel - Marginal Means across whole sample

#specify our design 
c_design <- profile_chosen ~ mark_up_cat + rating_cat + country 

# marginal means
mm_full <- cj(results_data, c_design, id = ~id, estimate = "mm")
mm_full$BY = "Full sample"
mm_full_plot  <- mm_full %>% filter(feature == "country") %>% 
  select(BY, estimate, lower, upper, statistic, level ) 


### from manuscript: 
# The marginal mean probability of selection is about .67 (95% CI: .65, .68) for U.S. goods, 
#.53 (95% CI:.51, .54) for goods made in Germany, and .33 (95% CI: .31, .34) for goods made in China. 

## MM US
mm_full %>% filter(level == "United States") %>% select(statistic, level, estimate, lower, upper) %>% mutate(across(3:5, round, 2))
## MM Germany 
mm_full %>% filter(level == "Germany") %>% select(statistic, level, estimate, lower, upper) %>% mutate(across(3:5, round, 2))
## MM China 
mm_full %>% filter(level == "China") %>% select(statistic, level, estimate, lower, upper) %>% mutate(across(3:5, round, 2))


### marginal means  -- by ethno
mm_ethno_bins <- cj(results_data, c_design, id = ~id, by = ~ ethno_factor, estimate = "mm")
mm_ethno_bins_plot  <- mm_ethno_bins %>% filter(feature == "country" & BY != "Medium ethnocentrism") %>% 
  select(BY, estimate, lower, upper, statistic, level ) 




# Center and Right Panel - AMCE and MTWP across whole sample - takes a long time to run.
data_for_cjoint <- results_data %>%  dplyr::select(profile_chosen, mark_up_cat, rating_cat, country, id, task_num, product_cat, ethno_factor) %>%
  drop_na() %>%
  mutate(country = relevel(country, ref="United States")) %>%
  mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))
results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)


for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset.
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country, data=bootSample, respondent.id = "id")
  
  #store estimates 
  country <-  as.tibble(t(results_vector[[run]]$estimates$country), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  #store the estimates
  estimates_results[[run]] <- bind_rows(country, markup) %>% mutate(run = run)
  #clear memory 
  rm(country, markup)
}

# combine results from the bootstrap 
full_sample_results <- bind_rows(estimates_results) 
# clean out 
rm(estimates_results, results_vector)

# Disaggregated by markup 
# get the markup coeffs to joint back into the dataset
full_sample_markups <- full_sample_results %>% filter(grepl('markup', est)) %>% 
  select(est, AMCE, run) %>%
  rename(markup_est = AMCE, markup = est)

# put the results together for plotting 
full_sample_results_summary <-  full_sample_results %>% 
  filter(grepl('country', est)) %>%  
  left_join(full_sample_markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = AMCE/(markup_est/as.numeric(markup_num))) %>%
  group_by(est) %>%
  summarise(mean_amce = mean(AMCE), 
            lower_amce =quantile(AMCE, probs=c(.025)), 
            upper_amce = quantile(AMCE, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975))) %>%
  mutate(ethno_factor = "Full sample") %>%
  rename(main = est)

# Sample-wide AMCE estimates for Figure 1 (center panel)
amce_full_sample_results <- full_sample_results_summary %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")
# Sample-wide MWTP estimates for Figure 1 (right panel)
mwtp_full_sample_results <- full_sample_results_summary %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")

mwtp_full_sample_results


###############################
# Estimates by level of ethnocentrism 
###############################

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$id)
set.seed(1234)

for (run in 1:runs) {
  # sample with replacement from a vector as long as our dataset. 
  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  #sample
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "id")
  
  #store estimates 
  country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
  country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
  ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
  rm(country_ethno,country,ethno,markup)
}

#combine results 
results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )

# pull out the markup estimates
markups <- results %>% filter(grepl('markup', main)) %>% 
  select(main, AMCE, run) %>%
  rename(markup_est = AMCE, markup = main)

# pull out estimates for high ethnocentrism  
high_ethno <- results %>% filter(grepl('country', main) & is.na(conditional)) %>% 
  mutate(ethno_factor = "High ethnocentrism") %>%
  select(`Conditional Estimate`, ethno_factor, run, main) %>%
  rename(estimate = `Conditional Estimate` )

# pull out estimates for low ethnocentrism 
low_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
  mutate(ethno_factor = "Low ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()
  
## Binned ethnocentrism results, averaging over the markups
full_results <- bind_rows(high_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))


amce_results <- full_results %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")

mwtp_results <- full_results %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")

# Combine all results for the plot 
binned_results <- bind_rows(amce_results, mwtp_results, amce_full_sample_results, mwtp_full_sample_results) %>%
  rename(estimate = mean, BY = ethno_factor, statistic = analysis, level = main)

# Labels for each panel
analysis_labels <- c("Marginal mean\n(relative to all other countries)", "AMCE\n(relative to United States)", "Implied MWTP\n(relative to United States)")

# Combine data for center and right panel with data for the left panel
for_plot <- bind_rows(binned_results, mm_full_plot, mm_ethno_bins_plot ) %>%
  mutate(level = str_remove(level, "^country")) %>%
  mutate(level= case_when(level == "AcountryoutsidetheUnitedStates" | level == "A country outside the United States" ~ "Outside the\nUnited States", 
         TRUE ~ level)) %>% 
  mutate(analysis_label = case_when(
           statistic == "AMCE" ~ analysis_labels[2], 
           statistic == "MWTP" ~analysis_labels[3],
           statistic == "mm" ~ analysis_labels[1], 
           TRUE ~ as.character(statistic))) %>% 
  mutate(analysis_label = factor(analysis_label, levels = analysis_labels )) %>%
  mutate(BY = case_when(
    BY == "Full sample" ~ "Sample-wide average", 
    TRUE ~ BY )) %>%
  mutate(BY = factor(BY, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
  mutate(level = factor(level, levels = c("China",  "Outside the\nUnited States", "Germany", "United States"))) 

#Build Figure 1
pd_combined<-position_dodge(.75)
combined_plot_short_article <- ggplot(data=for_plot, aes(y=estimate, x=level, group=BY)) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
  geom_point(aes(fill=BY, shape=BY),color="black",size=4,position=pd_combined) + 
  xlab("Country") +
  ylab("Estimate") +
  scale_shape_manual(values=c(22,21, 23, 24))+
  facet_wrap(analysis_label~., scales="free") + plot_theme
combined_plot_short_article

####################
## Output Figure 1
####################

ggsave("Figure1.png", combined_plot_short_article, dpi=600, width=14, height=7)



## Estimate differences and CI for MTWP across ethnocentrism levels.
## Differences reported on page 10 of manuscript. 
## 
# calculate mtwp, then average over the markup values...
mtwp_china_high <-high_ethno %>% 
  filter(main == "countryChina") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) 

mtwp_china_low <-low_ethno %>% 
  filter(main == "countryChina") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) 

mtwp_outside_high <-high_ethno %>% 
  filter(main == "countryAcountryoutsidetheUnitedStates") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num)))  

mtwp_outside_low <-low_ethno %>% 
  filter(main == "countryAcountryoutsidetheUnitedStates") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) 


mtwp_germany_high <-high_ethno %>% 
  filter(main == "countryGermany") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num)))  

mtwp_germany_low <-low_ethno %>% 
  filter(main == "countryGermany") %>%
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) 


mwtp_china <- as_tibble(mtwp_china_high$mwtp -  mtwp_china_low$mwtp) %>%
  summarise(mean_mwtp = mean(value), 
            lower_mwtp =quantile(value, probs=c(.025)), 
            upper_mwtp = quantile(value, probs=c(.975)))

mwtp_outside <- as_tibble(mtwp_outside_high$mwtp -  mtwp_outside_low$mwtp) %>%
  summarise(mean_mwtp = mean(value), 
            lower_mwtp =quantile(value, probs=c(.025)), 
            upper_mwtp = quantile(value, probs=c(.975)))


mwtp_germany <- as_tibble(mtwp_germany_high$mwtp -  mtwp_germany_low$mwtp) %>%
  summarise(mean_mwtp = mean(value), 
            lower_mwtp =quantile(value, probs=c(.025)), 
            upper_mwtp = quantile(value, probs=c(.975)))
### From manuscript: 
# Among those with low levels of ethnocentrism, a product from the United States would need to cost about 
# 64 percent more than an otherwise identical good produced in China to make respondents indifferent between the two. 
for_plot %>% filter(level == "China" & statistic == "MWTP")

### From manuscript: 
# This quantity is 34.4 (95% CI: 19.6, 51.3) percentage points higher for those with high levels of ethnocentrism. 
mwtp_china %>% mutate(across(1:3, round, 2))
# Products produced in Germany and “outside the United States” yield more modest estimates of MWTP for domestic production, 
# but suggest similar support for our predictions; the difference in MTWP between high and low ethnocentrists is 
# 25.3 (95% CI: 12.4, 39.8) percentage points and 17.7 (95% CI: 5.37, 31.2)percentage points for the respective treatment conditions.
mwtp_outside %>% mutate(across(1:3, round, 2))
mwtp_germany %>% mutate(across(1:3, round, 2))


### From manuscript:
# In our closest comparison to that analysis, we estimate the MWTP for a cell phone screen protector made in the United States 
# relative to one made “outside the United States” to about 39 percent, suggesting that our experiment returns estimates of 
# country-of-origin effects that are not exceptionally far outside the range of similar incentive compatible studies. 
cjoint_results_screen <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country, data=filter(data_for_cjoint, product_cat=="Cell phone screen protector"), respondent.id = "id")

### AMCE on generic other country
country_est_screen <- cjoint_results_screen$estimates$country["AMCE","countryAcountryoutsidetheUnitedStates"]
## implied decline in demand for 1 percentage point increase in price over our three bins. 
markup_25_screen <- cjoint_results_screen$estimates$markupcat["AMCE","markupcat25"]/25
markup_50_screen <- cjoint_results_screen$estimates$markupcat["AMCE","markupcat50"]/50
markup_100_screen <- cjoint_results_screen$estimates$markupcat["AMCE","markupcat100"]/100
avg_over_markup_screen <- (markup_25_screen + markup_50_screen + markup_100_screen)/3
## MWTP for screen protector from US relative to "outside the US"
round(country_est_screen/avg_over_markup_screen, 2)







